rm(list = ls())

library(readxl)
library(tidyr)
library(dplyr)
library(lubridate)
library(ggplot2)
library(haven)
library(forcats)
library(ggrepel)
library(cowplot)
library(vdemdata)
library(countrycode)
library(scales)
library(sjstats)
library(survey)
library(Amelia)
library(vdemdata)
library(readr)
library(stringr)

path <- "/Users/eborbath/Library/CloudStorage/Dropbox-WZB/Endre Borbath/Papers/Environmental_protests/"

PEA <- read_csv(paste0(path, "data/PEA_2000_2021/PEA_integrated_2000_2021_V2_cleaner_longcov.csv")) %>% 
  select(-`...1`) %>%
    # mutate(countryname=as.character(as_factor(countryname))) %>%
    mutate(country_name = ifelse(country_name == "northern ireland", "united kingdom", country_name)) %>%
    mutate(country_iso = countryname(country_name, "iso2c")) %>%
    select(
        country_iso, year, date, weighted_event, weighted_part_all, contains("issue_"), contains("actor_"),
        action_form
    ) %>% 
  select(-covidissue_pro, -covidissue_anti, -actor_string1, -actor_string2) %>% 
  mutate(date2=ifelse(year==2021, date, as.character(NA))) %>% 
  mutate(date=ifelse(year==2021, as.character(NA), date)) %>% 
  mutate(date=ymd(date)) %>% 
  mutate(date2=dmy(date2)) %>% 
  mutate(date=ifelse(year==2021, date2, date)) %>% 
  mutate(date=as.Date(date)) %>% 
  select(-date2) %>%
  mutate(source = "PEA")

summary(PEA$date)
table(is.na(PEA$date))

# table(PEA$action_form[PEA$issue_environment == "yes"])

parlgov <- read_xlsx(paste0(path, "data/parlgov/parlgov_21042023.xlsx"), sheet = "election")

subelect <- parlgov %>%
    filter(election_type == "parliament") %>%
    mutate(green_seat_share = seats / seats_total) %>%
    select(country_name, country_id, party_id, party_name, vote_share, election_date, green_seat_share)

parlgov <- read_xlsx(paste0(path, "data/parlgov/parlgov_21042023.xlsx"), sheet = "party")

subparty <- parlgov %>%
    filter(family_name == "Green/Ecologist") %>%
    select(party_id, family_name)

parlgovgreen <- merge(subparty, subelect, all = TRUE)

rm(subparty, subelect, parlgov)

parlgovgreen <- parlgovgreen %>%
    mutate_at(vars(vote_share, green_seat_share), ~ ifelse(family_name == "Green/Ecologist", ., as.numeric(NA))) %>%
    select(-family_name) %>%
    mutate(election_year = as.numeric(format(as.Date(election_date, format = "%Y-%m-%d"), "%Y"))) %>%
    group_by(country_name, election_date) %>%
    mutate(
        vote_share = sum(vote_share, na.rm = TRUE),
        green_seat_share = sum(green_seat_share, na.rm = TRUE)
    ) %>%
    ungroup(.) %>%
    select(-party_name, -party_id) %>%
    unique(.) %>%
    filter(!is.na(country_name))

fullgridPG <- expand.grid(
    country_name = unique(parlgovgreen$country_name),
    election_year = seq(1999, 2021, 1)
)

parlgovgreen <- full_join(fullgridPG, parlgovgreen)

rm(fullgridPG)

parlgovgreen <- parlgovgreen %>%
    arrange(country_name, election_year, election_date) %>%
    group_by(country_name) %>%
    fill(country_id, .direction = "updown") %>%
    fill(election_date, .direction = "down") %>%
    group_by(country_name, election_date) %>%
    fill(green_seat_share, .direction = "down") %>%
    fill(vote_share, .direction = "down") %>%
    ungroup(.) %>%
    filter(election_year >= 1999) %>%
    filter(election_year <= 2021) %>%
    mutate(greenvote_share = vote_share) %>%
    select(-vote_share) %>%
    mutate_at(vars(green_seat_share, greenvote_share), ~ ifelse(is.na(.), 0, .))


parlgov_cabinet <- read_xlsx(paste0(path, "data/parlgov/parlgov_21042023.xlsx"), sheet = "cabinet")

parlgov_party <- read_xlsx(paste0(path, "data/parlgov/parlgov_21042023.xlsx"), sheet = "party") %>%
    select(party_id, family_name)

parlgov_cabinet <- left_join(parlgov_cabinet, parlgov_party, by = "party_id")

rm(parlgov_party)

parlgov_cabinet <- parlgov_cabinet %>%
    filter(cabinet_party == 1) %>%
    mutate(seat_share = seats / election_seats_total) %>%
    group_by(cabinet_id) %>%
    mutate(nr_cabinet_parties = n()) %>%
    mutate(cab_ideology = weighted.mean(left_right, w = seat_share, na.rm = TRUE)) %>%
    mutate(pm_ideology = sum(left_right * prime_minister, na.rm = TRUE)) %>%
    mutate(green_in_gov = ifelse(family_name == "Green/Ecologist", 1, 0)) %>%
    ungroup(.) %>%
    select(country_name, start_date, nr_cabinet_parties, cab_ideology, pm_ideology, green_in_gov) %>%
    distinct() %>%
    mutate(start_date = ymd(start_date)) %>%
    mutate(year = year(start_date)) %>%
    arrange(country_name, start_date) %>%
    group_by(country_name) %>%
    mutate(cab_length = start_date - dplyr::lag(start_date)) %>%
    group_by(country_name, year) %>%
    arrange(year, cab_length) %>%
    slice(1) %>%
    ungroup(.) %>%
    select(-start_date)

fullgridPG <- expand.grid(
    country_name = unique(parlgov_cabinet$country_name),
    year = seq(1999, 2021, 1)
)

parlgov_cabinet <- full_join(fullgridPG, parlgov_cabinet)

rm(fullgridPG)

parlgov_cabinet <- parlgov_cabinet %>%
    arrange(country_name, year) %>%
    group_by(country_name) %>%
    fill(nr_cabinet_parties, .direction = "down") %>%
    fill(cab_ideology, .direction = "down") %>%
    fill(pm_ideology, .direction = "down") %>%
    fill(green_in_gov, .direction = "down") %>%
    fill(cab_length, .direction = "down") %>%
    ungroup(.) %>%
    filter(year >= 1999) %>%
    filter(year <= 2021) %>%
    # group_by(country_name, year) %>%
    # mutate_at(vars(nr_cabinet_parties, cab_ideology, pm_ideology), ~ mean(., na.rm = TRUE)) %>%
    # ungroup() %>%
    distinct() %>%
    mutate(country_iso = countryname(country_name, "iso2c")) %>%
    select(-country_name)


Manifesto <- read.csv(paste0(path, "data/manifesto/MPDataset_MPDS2022a.csv"))

# per501 Environmental Protection
# per416 Anti-Growth Economy: Positive
# per410 Economic Growth: Positive

Manifestomod <- Manifesto %>%
    select(countryname, edate, pervote, party, partyname, parfam, per501, per416, per410) %>%
    mutate(parfam = as.character(parfam)) %>%
    mutate(parfamname = case_when(
        parfam == "10" ~ "Green/Ecologist",
        parfam == "20" ~ "Socialist or other left parties",
        parfam == "30" ~ "Social democratic parties",
        parfam == "40" ~ "Liberal parties",
        parfam == "50" ~ "Christian democratic parties",
        parfam == "60" ~ "Conservative parties",
        parfam == "70" ~ "Nationalist parties",
        parfam == "80" ~ "Agrarian parties",
        parfam == "90" ~ "Ethnic and regional parties",
        parfam == "95" ~ "Special issue parties",
        parfam == "98" ~ "Electoral alliances of diverse origin without dominant party",
        TRUE ~ parfam
    )) %>%
    select(-parfam) %>%
    mutate_at(vars(per501, per416, per410), ~ ifelse(is.na(.), as.numeric(0), .)) %>%
    mutate(growthprod = per416 - per410) %>%
    mutate(green_env_prot = case_when(
        parfamname == "Green/Ecologist" ~ per501,
        TRUE ~ as.numeric(NA)
    )) %>%
    mutate(green_anti_growth = case_when(
        parfamname == "Green/Ecologist" ~ per416,
        TRUE ~ as.numeric(NA)
    )) %>%
    mutate(green_productivity = case_when(
        parfamname == "Green/Ecologist" ~ per410,
        TRUE ~ as.numeric(NA)
    )) %>%
    mutate(green_growthprod = case_when(
        parfamname == "Green/Ecologist" ~ growthprod,
        TRUE ~ as.numeric(NA)
    )) %>%
    mutate_at(vars(per501, per416, per410, growthprod), ~ ifelse(parfamname == "Green/Ecologist",
        as.numeric(NA), .
    )) %>% # in the end I need the system average without the green parties
    mutate(year = as.numeric(format(as.Date(edate, format = "%d/%m/%Y"), "%Y"))) %>%
    filter(year >= 1995) %>%
    group_by(countryname, edate) %>%
    mutate_at(vars(pervote), ~ ifelse(is.na(.), as.numeric(1), .)) %>%
    mutate(per501 = weighted.mean(per501, w = pervote, na.rm = T)) %>%
    mutate(per416 = weighted.mean(per416, w = pervote, na.rm = T)) %>%
    mutate(per410 = weighted.mean(per410, w = pervote, na.rm = T)) %>%
    mutate(growthprod = weighted.mean(growthprod, w = pervote, na.rm = T)) %>%
    mutate(green_env_prot = weighted.mean(green_env_prot, w = pervote, na.rm = T)) %>%
    mutate(green_anti_growth = weighted.mean(green_anti_growth,
        w = pervote, na.rm = T
    )) %>%
    mutate(green_productivity = weighted.mean(green_productivity, w = pervote, na.rm = T)) %>%
    mutate(green_growthprod = weighted.mean(green_growthprod, w = pervote, na.rm = T)) %>%
    ungroup(.) %>%
    select(-party, -partyname, -pervote, -parfamname) %>%
    unique(.) %>%
    mutate_at(vars(contains("green_"), per501, per416, per410, growthprod), ~ ifelse(is.na(.), as.numeric(0), .)) %>%
    # group_by(countryname, year) %>%
    # mutate_at(vars(contains("green_"), per501, per416, per410, growthprod), ~ mean(., na.rm=TRUE)) %>%
    # ungroup(.) %>%
    # select(-edate) %>%
    unique()

fullgridMani <- expand.grid(
    countryname = unique(Manifestomod$countryname),
    year = seq(1999, 2021, 1)
)

Manifestomod <- full_join(fullgridMani, Manifestomod)

rm(fullgridMani, Manifesto)

Manifestomod <- Manifestomod %>%
    arrange(countryname, year) %>%
    group_by(countryname) %>%
    fill(edate, .direction = "down") %>%
    fill(per501, .direction = "down") %>%
    fill(per416, .direction = "down") %>%
    fill(per410, .direction = "down") %>%
    fill(green_anti_growth, .direction = "down") %>%
    fill(green_env_prot, .direction = "down") %>%
    fill(green_growthprod, .direction = "down") %>%
    fill(green_productivity, .direction = "down") %>%
    fill(growthprod, .direction = "down") %>%
    ungroup(.) %>%
    filter(year >= 1999) %>%
    filter(year <= 2021) %>%
    select(-per410, -green_productivity) %>%
    rename(
        env_prot_sys = per501,
        anti_growth_sys = per416,
        pos_growth_sys = growthprod,
        env_prot_green = green_env_prot,
        anti_growth_green = green_anti_growth,
        pos_growth_green = green_growthprod
    )


qogdata <- read_dta(paste0(path, "data/QoG/qog_ei_sept21.dta"))

# ccl_mitlpp - number of climate change laws
# bti_envc - "To what extent are environmental concerns effectively taken into account?""
# edgar_co2gdp - CO2 emmission by GDP
# edgar_co2pc - CO2 emmission per capita
# epi_cda - CO2 growth rate
# epcc_ch2 - Sum of all changes in policy (epcc_ch2)
# epcc_ch_kum - Cumulative sum of all policy-in-place items
# engo_nengo - 3.15.1 NumberofnationalENGOs
# epcc_susp_in2 - National environmental policy/Sustainable development plan introduction
# ess_impenv_m - "She/he strongly believes that people should care for nature. Looking after the environment is important to her/him".
# wdi_piesr - Policy and institutions for environmental sustainability measures the extent to which environmental policies foster the protection and sustainable use of natural resources and the management of pollution
# wvs_ameop - Active memberships in environmental organizations (%)
# wvs_epmip - Protecting environment vs economic growth

qogdata1 <- qogdata %>%
    select(
        cname, year, bti_envc, ccl_mitlpp, edgar_co2gdp, epcc_ch2, epcc_ch_kum,
        epcc_susp_in2, engo_nengo, epi_ghp, oecd_eps, ess_impenv_m,
        oecd_cctr_tot, oecd_cctr_gdp, oecd_etr_gdp, oecd_etr_tot, slaws_mit_ex_l3, slaws_mit_ex_lt,
        edgar_co2pc, epi_cda, ess_impenv_m, wdi_piesr, wvs_ameop, wvs_epmip
    ) %>%
    mutate(country_iso = countryname(cname, "iso2c")) %>%
    filter(country_iso %in% unique(PEA$country_iso)) %>%
    filter(year %in% seq(2000, 2021, 1)) %>%
    select(-cname) %>%
    arrange(country_iso, year)

table(is.na(qogdata1$bti_envc))
table(is.na(qogdata1$ccl_mitlpp)) #
table(is.na(qogdata1$ccl_nmitlp)) #
table(is.na(qogdata1$engo_nengo)) #
table(is.na(qogdata1$epcc_ch_kum))
table(is.na(qogdata1$edgar_co2gdp)) #
table(is.na(qogdata1$oecd_etr_gdp)) #
table(is.na(qogdata1$epi_cda)) # no NA
table(is.na(qogdata1$vparty_envseat))
table(is.na(qogdata1$wvs_deop))
table(is.na(qogdata1$slaws_mit_ex_l3))
table(is.na(qogdata1$wvs_ameop))

# qogdata1 <- qogdata1 %>%
#     select(-bti_envc, -ess_impenv_m, -wdi_piesr, -wvs_ameop, -wvs_epmip)

rm(qogdata)

oecd <- read_csv(paste0(path, "data/OECD/ERTR_10102023163658175.csv"))

oecd_dat <- oecd %>%
    filter(CAT == "TOT") %>%
    filter(DOM == "TOT") %>%
    select(Country, Year, Value) %>%
    rename(
        oecd_tax_env = Value,
        year = Year
    ) %>%
    mutate(country_iso = countryname(Country, "iso2c")) %>%
    select(-Country) %>%
    distinct()

rm(oecd)
# filter(country_iso %in% unique(PEA$country_iso))


# IUCN data with the env organizations

iucn <- read_csv(paste0(path, "data/IUCN/membership_info.csv"))

iucn_dat <- iucn %>%
    mutate(country_iso = countryname(country_name, "iso2c")) %>%
    mutate(year = str_sub(member_since, start = -4)) %>%
    # mutate(year = str_extract(member_since, "\\d{4}")) %>%
    mutate(year = as.numeric(year)) %>%
    filter(membership_category %in% c("NationalNGO", "InternationalNGO", "Affiliate")) %>% # With only nationalNGO, 210 missing, with InternationalNGO, Affiliate only 163 are missings.
    group_by(country_iso, year) %>%
    mutate(NGO_IUCN = n()) %>%
    ungroup(.) %>%
    select(country_iso, year, NGO_IUCN) %>%
    distinct() %>%
    # filter(country_iso %in% unique(PEA$country_iso)) %>%
    # filter(year >= 2000) %>%
    # filter(year <= 2020)
    arrange(country_iso, year) %>%
    group_by(country_iso) %>%
    mutate(NGO_IUCN = cumsum(NGO_IUCN)) %>%
    ungroup(.)

PEA_grid <- as.data.frame(expand.grid(
    country_iso = unique(PEA$country_iso),
    year = seq(2000, 2021, 1)
))

iucn_dat <- left_join(PEA_grid, iucn_dat, by = c("country_iso", "year"))

iucn_dat <- iucn_dat %>%
    arrange(country_iso, year) %>%
    group_by(country_iso) %>%
    fill(NGO_IUCN, .direction = "down") %>%
    ungroup(.) %>%
    mutate(NGO_IUCN = ifelse(is.na(NGO_IUCN), 0, NGO_IUCN)) %>%
    group_by(country_iso) %>%
    mutate(country_mean = mean(NGO_IUCN, na.rm = TRUE)) %>% # countries with no NGO member in IUCN will get a missing value
    ungroup() %>%
    mutate(NGO_IUCN = ifelse(country_mean == 0, as.numeric(NA), NGO_IUCN)) %>%
    select(-country_mean)

table(is.na(iucn_dat$NGO_IUCN))

rm(iucn)

# World Bank dataset, variable CO2 per capita

cocapita <- read_xls(paste0(path, "data/WorldBank/API_EN.ATM.CO2E.PC_DS2_en_excel_v2_4770549.xls"))

cocapita <- cocapita %>%
    mutate(country_iso = countryname(countryname, "iso2c")) %>%
    select(country_iso, contains("year"))


cocapita <- reshape2::melt(cocapita, id.vars = "country_iso", measure.vars = c(
    "year2000", "year2001", "year2002",
    "year2003", "year2004", "year2005", "year2006", "year2007", "year2008", "year2009",
    "year2010", "year2011", "year2012",
    "year2013", "year2014", "year2015", "year2016", "year2017", "year2018", "year2019"
))

cocapita$variable <- as.numeric(gsub("year", "", cocapita$variable))

cocapita <- cocapita %>%
    rename(year = variable) %>%
    rename(WBco2tonscapita = value)


# World Bank dataset, variable CO2 per GDP
co2gdp <- read_xls(paste0(path, "data/WorldBank/co2gdp.xls"))
co2gdp <- co2gdp %>%
    mutate(country_iso = countryname(countryname, "iso2c")) %>%
    select(country_iso, contains("year"))

co2gdp <- reshape2::melt(co2gdp, id.vars = "country_iso", measure.vars = c(
    "year2000", "year2001", "year2002",
    "year2003", "year2004", "year2005", "year2006", "year2007", "year2008", "year2009",
    "year2010", "year2011", "year2012",
    "year2013", "year2014", "year2015", "year2016", "year2017", "year2018", "year2019"
))

co2gdp$variable <- as.numeric(gsub("year", "", co2gdp$variable))

co2gdp <- co2gdp %>%
    rename(year = variable) %>%
    rename(co2gdp = value)

## Eurostat dataset, variable CO2 per capita (all years)
eurostatGHGtonscapita <- read_excel(paste0(path, "data/Eurostat/tonspercapita_eurostat.xlsx"))
eurostatGHGtonscapita <- eurostatGHGtonscapita %>%
    mutate(country_iso = countryname(countryname, "iso2c")) %>%
    select(country_iso, contains("year"))


eurostatGHGtonscapita <- reshape2::melt(eurostatGHGtonscapita, id.vars = "country_iso", measure.vars = c(
    "year2000", "year2001", "year2002",
    "year2003", "year2004", "year2005", "year2006", "year2007", "year2008", "year2009",
    "year2010", "year2011", "year2012",
    "year2013", "year2014", "year2015", "year2016", "year2017", "year2018", "year2019", "year2020"
))

eurostatGHGtonscapita$variable <- as.numeric(gsub("year", "", eurostatGHGtonscapita$variable))

eurostatGHGtonscapita <- eurostatGHGtonscapita %>%
    rename(year = variable) %>%
    rename(co2tonscapita = value)

eurostatGHGtonscapita$co2tonscapita[eurostatGHGtonscapita$country_iso == "GB" &
    eurostatGHGtonscapita$year == 2020 & eurostatGHGtonscapita$co2tonscapita == ":"] <- NA

eurostatGHGtonscapita$co2tonscapita <- as.numeric(as.character(eurostatGHGtonscapita$co2tonscapita))


# Parlgov Env position

env_pos_me <- read.csv(paste0(path, "data/manifesto/gov_env_prot_pos.csv")) %>%
    mutate(country_iso = countrycode(country_iso, "iso3c", "iso2c"))

parlgov_env_wratil <- read_dta(paste0(path, "data/CMP-ParlGov 2.2/parties_cmp2019_online_2.2.dta")) %>%
    mutate(country = as.character(as_factor(country))) %>%
    mutate(year = year(ymd(date))) %>%
    group_by(country, year) %>%
    mutate(gov_env_cmp = mean(gov_env_cmp, na.rm = TRUE)) %>%
    ungroup() %>%
    select(country, year, gov_env_cmp) %>%
    distinct() %>%
    mutate(country_iso = countrycode(country, "country.name", "iso2c")) %>%
    select(-country)

env_pos_me <- full_join(parlgov_env_wratil, env_pos_me, by = c("country_iso", "year")) %>%
    mutate(gov_env_cmp = ifelse(is.na(gov_env_cmp), gov_env_prot_pos, gov_env_cmp)) %>%
    select(-gov_env_prot_pos) %>%
    na.omit()

# GB 2020 missing

# V-dem data

vdemdata <- vdem %>%
    select(country_name, year, v2x_divparctrl) %>%
    na.omit() %>%
    mutate(country_iso = countryname(country_name, "iso2c")) %>%
    select(-country_name) %>%
    rename(divparctrl = v2x_divparctrl)

# Dissatisfaction with democracy

satdem <- read.csv(paste0(path, "data/demsat/demosatif.csv")) %>%
    group_by(countrycode, year) %>%
    mutate_at(vars(scale, dummy), ~ mean(., na.rm = TRUE)) %>%
    ungroup() %>%
    select(year, scale, dummy, countrycode) %>%
    unique() %>%
    rename(country_string = countrycode) %>%
    mutate(country_iso = countrycode(country_string, "iso3c", "iso2c")) %>%
    rename_at(vars(scale, dummy), ~ paste0("satdem_", .)) %>%
    select(-country_string)

# Exposure to the news

factiva <- read.csv(paste0(path, "data/factiva/news_compilation.csv")) %>%
    filter(year >= 2000 & year <= 2021) %>%
    group_by(country) %>%
    mutate(total_nr_art = sum(nbr_articles, na.rm = TRUE)) %>%
    ungroup() %>%
    mutate(news_share = nbr_articles / total_nr_art * 100) %>%
    mutate(country_iso = countrycode(country, "country.name", "iso2c")) %>%
    select(country_iso, year, news_share)

# Raw EPI: 100% - | Inflation Rate | - Unemployment Rate - Budget Deficit/GDP + Change in Real GDP

econ_indic <- read.csv(paste0(path, "data/economic indicators/european_data.csv")) %>%
    mutate(econ_indic = 100 - abs(inflation) - unemployment - budget_deficit + gdp_growth) %>%
    mutate(econ_indic = as.numeric(econ_indic)) %>%
    select(country, year, econ_indic) %>%
    mutate(country_iso = countrycode(country, "iso3c", "iso2c")) %>%
    select(-country)

# Natural disasters

natdis <- read.csv(paste0(path, "data/natural_disasters/natural_disasters.csv")) %>%
    mutate(country_iso = countrycode(ISO, "iso3c", "iso2c")) %>%
    mutate(year = as.numeric(Year)) %>%
    mutate(nr_disasters = as.numeric(Count)) %>%
    select(country_iso, year, nr_disasters)

# Merge PEA and parlgovgreen based on country_iso and date/election_date

table(PEA$issue_anti_europe)

dput(colnames(PEA)[grepl("issue", colnames(PEA))])
dput(colnames(PEA)[grepl("actor", colnames(PEA))])

PEA_yearly <- PEA %>%
    mutate(only_env = ifelse(
        issue_environment == "yes" & issue_econ_private == "no" & issue_econ_public == "no" &
            issue_cult_lib == "no" & issue_political == "no" & issue_regional == "no" & issue_cult_cons == "no" &
            issue_xeno == "no" & issue_other == "no" & issue_anti_europe == "no" & issue_anti_immig == "no" &
            issue_pro_europe == "no" & issue_pro_immig == "no" & issue_corona_anti_restric == "no" & issue_corona_pro_restric == "no" &
            issue_education == "no" & issue_healthcare == "no",
        as.numeric(1), as.numeric(0)
    )) %>%
    mutate(no_org = ifelse((actor_party_left == "no" & actor_party_right == "no" & actor_party_unknown == "no" &
        actor_union_private == "no" & actor_union_public == "no" & actor_union_both == "no" &
        actor_union_unknown == "no" &
        actor_party_far_left == "no" & actor_party_far_right == "no" & actor_party_main_left == "no" &
        actor_party_main_right == "no" & actor_prof_org == "no" & actor_non_prof_org == "no") &
        (actor_dontknow == "yes" | actor_missing == "yes" | actor_general_citizens == "yes" |
            actor_group_pens == "yes" | actor_group_stud == "yes" |
            actor_group_occup == "yes" | actor_group_other == "yes" | actor_other == "yes" |
            actor_group_lgbt == "yes" | actor_group_refugee == "yes" | actor_group_women == "yes") &
        issue_environment == "yes",
    as.numeric(1), as.numeric(0)
    )) %>%
    mutate(demo = ifelse(action_form == "demonstrations" & issue_environment == "yes", 1, 0)) %>%
    mutate(issue_environment = ifelse(issue_environment == "yes", as.numeric(1), as.numeric(0))) %>%
    group_by(country_iso, year) %>%
    mutate(total_env_protest = sum(weighted_event * issue_environment, na.rm = TRUE)) %>%
    mutate(total_env_part = sum(weighted_part_all * issue_environment, na.rm = TRUE)) %>%
    mutate(share_env_protest = total_env_protest / sum(weighted_event, na.rm = TRUE)) %>%
    mutate(share_env_part = total_env_part / sum(weighted_part_all, na.rm = TRUE)) %>%
    mutate(share_demo = sum(weighted_event * demo, na.rm = TRUE) / total_env_protest) %>%
    mutate(total_nr_other_protest = sum(weighted_event, na.rm = TRUE) - total_env_protest) %>%
    mutate(total_nr_other_part = sum(weighted_part_all, na.rm = TRUE) - total_env_part) %>%
    mutate(sum_only_env = sum(weighted_event * only_env, na.rm = TRUE)) %>%
    mutate(share_other_issues = 1 - (sum_only_env / total_env_protest)) %>%
    mutate(sum_no_org = sum(weighted_event * no_org, na.rm = TRUE)) %>%
    mutate(share_organized = 1 - (sum_no_org / total_env_protest)) %>%
    ungroup(.) %>%
    select(starts_with("share_"), starts_with("total_"), country_iso, year) %>%
    distinct()

table(PEA_yearly$share_organized)

full_grid <- as.data.frame(expand.grid(
    country_iso = unique(PEA$country_iso),
    year = unique(PEA$year)
))

PEA_yearly <- left_join(full_grid, PEA_yearly) %>%
    mutate_at(vars(starts_with("share_"), starts_with("total_")), ~ ifelse(is.na(.), 0, .))

rm(PEA, full_grid)

protest_countries <- unique(PEA_yearly$country_iso)

parlgov <- parlgovgreen %>%
    select(country_name, election_year, greenvote_share, green_seat_share) %>%
    mutate(country_iso = countryname(country_name, "iso2c")) %>%
    filter(country_iso %in% protest_countries) %>%
    select(-country_name) %>%
    rename(year = election_year) %>%
    group_by(country_iso, year) %>%
    summarise_at(vars(greenvote_share, green_seat_share), ~ mean(., na.rm = TRUE)) %>%
    ungroup(.) %>%
    distinct(.)

PEA_yearly <- left_join(PEA_yearly, parlgov)

rm(parlgov, parlgovgreen)

PEA_yearly <- left_join(PEA_yearly, parlgov_cabinet)

rm(parlgov_cabinet)

# Merge PEA and Manifestomod based on country_iso and date/election_date
# it should give 38359 rows in the end

Manifestomod <- Manifestomod %>%
    mutate(country_iso = countryname(countryname, "iso2c")) %>%
    filter(country_iso %in% protest_countries) %>%
    select(-countryname, -edate) %>%
    group_by(year, country_iso) %>%
    summarise_all(~ mean(., na.rm = TRUE)) %>%
    ungroup(.) %>%
    distinct() %>%
    mutate_at(
        vars(starts_with("env_"), starts_with("anti_"), starts_with("pos_")),
        ~ ifelse(is.na(.), 0, .)
    )

head(Manifestomod)

## environmental performance index

# source(paste0(path, "data/env_performance_index/env_performance.R"))

epi_index <- read.csv(paste0(path, "data/env_performance_index/env_perf_index.csv")) %>% 
  select(-matches("_weight")) %>% 
  select(iso, country, Year, EPI, climate_change_index, env_health_index, eco_vitality_index) %>% 
  mutate(country_iso = countryname(country, "iso2c")) %>% 
  select(-iso, -country) %>% 
  rename(year=Year)

PEA_yearly <- left_join(PEA_yearly, epi_index, by = c("country_iso", "year"))
table(is.na(PEA_yearly$EPI))
rm(epi_index)

PEA_yearly <- left_join(PEA_yearly, Manifestomod)

rm(Manifestomod, protest_countries)

# merge the yearly dataset

PEA_yearly <- left_join(PEA_yearly, qogdata1, by = c("country_iso", "year"))
rm(qogdata1)

PEA_yearly <- left_join(PEA_yearly, co2gdp, by = c("country_iso", "year"))
rm(co2gdp)

PEA_yearly <- left_join(PEA_yearly, cocapita, by = c("country_iso", "year"))
rm(cocapita)

PEA_yearly <- left_join(PEA_yearly, eurostatGHGtonscapita, by = c("country_iso", "year"))
rm(eurostatGHGtonscapita)

PEA_yearly <- left_join(PEA_yearly, vdemdata, by = c("country_iso", "year"))
rm(vdemdata)

PEA_yearly <- left_join(PEA_yearly, satdem, by = c("country_iso", "year"))
rm(satdem)

PEA_yearly <- left_join(PEA_yearly, factiva, by = c("country_iso", "year"))
rm(factiva)

PEA_yearly <- left_join(PEA_yearly, env_pos_me, by = c("country_iso", "year"))
rm(env_pos_me)

PEA_yearly$gov_env_cmp[PEA_yearly$country_iso == "AT" & PEA_yearly$year == 2019] <- PEA_yearly$gov_env_cmp[PEA_yearly$country_iso == "AT" & PEA_yearly$year == 2018]

PEA_yearly <- left_join(PEA_yearly, econ_indic, by = c("country_iso", "year"))
rm(econ_indic)

PEA_yearly <- left_join(PEA_yearly, iucn_dat, by = c("country_iso", "year"))
rm(iucn_dat)

PEA_yearly <- left_join(PEA_yearly, oecd_dat, by = c("country_iso", "year"))
rm(oecd_dat)

table(PEA_yearly$country_iso[is.na(PEA_yearly$oecd_tax_env)], PEA_yearly$year[is.na(PEA_yearly$oecd_tax_env)])

PEA_yearly$oecd_tax_env[PEA_yearly$country_iso == "CH" & PEA_yearly$year == 2021] <- PEA_yearly$oecd_tax_env[PEA_yearly$country_iso == "CH" & PEA_yearly$year == 2020]

PEA_yearly$oecd_tax_env[PEA_yearly$country_iso == "GR" & PEA_yearly$year == 2021] <- PEA_yearly$oecd_tax_env[PEA_yearly$country_iso == "GR" & PEA_yearly$year == 2020]


table(PEA_yearly$country_iso[is.na(PEA_yearly$oecd_tax_env)], PEA_yearly$year[is.na(PEA_yearly$oecd_tax_env)])

PEA_yearly <- left_join(PEA_yearly, natdis, by = c("country_iso", "year"))
rm(natdis)

table(PEA_yearly$gov_env_cmp, useNA = "always")

table(
    PEA_yearly$year[is.na(PEA_yearly$gov_env_cmp)],
    PEA_yearly$country_iso[is.na(PEA_yearly$gov_env_cmp)]
)

PEA_yearly$satdem_scale[PEA_yearly$year == 2008] <- (PEA_yearly$satdem_scale[PEA_yearly$year == 2007] +
    PEA_yearly$satdem_scale[PEA_yearly$year == 2009]) / 2
PEA_yearly$satdem_dummy[PEA_yearly$year == 2008] <- (PEA_yearly$satdem_dummy[PEA_yearly$year == 2007] +
    PEA_yearly$satdem_dummy[PEA_yearly$year == 2009]) / 2

save(PEA_yearly, file = paste0(path, "data/PEA_yearly_16052024.RData"))

table(PEA_yearly$green_in_gov, useNA = "always")

table(
    PEA_yearly$country_iso[PEA_yearly$total_env_protest > 0 & PEA_yearly$share_demo == 0],
    PEA_yearly$year[PEA_yearly$total_env_protest > 0 & PEA_yearly$share_demo == 0]
)
